Data-Scientists1.png

In [ ]:
# !conda install -c plotly plotly -y
In [1]:
# Importando as bibliotecas necessárias
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.tools import FigureFactory as FF
from IPython.core.display import display, HTML
import warnings
import operator

%matplotlib inline
rcParams['figure.figsize'] = 7.5, 6
sns.set_style('whitegrid')
warnings.filterwarnings('ignore')
init_notebook_mode(connected=True)
pd.set_option('display.max_columns', None)
In [2]:
mcq = pd.read_csv('../bases/multiple_choice_responses.csv')
question = pd.read_csv('../bases/questions_only.csv')
text_response = pd.read_csv('../bases/other_text_responses.csv')

Trabalho dos Entrevistados

In [3]:
c = pd.value_counts(mcq.Q5)
d = pd.DataFrame(data=c)
d.columns=['count']
d = d.iloc[:9,:].sort_values('count', ascending=True)
d['count'] = pd.to_numeric(d['count'], errors='coerce')

plt.barh(d.index, d["count"])
plt.show()

Distribuição no Globo

In [4]:
df=mcq[mcq['Q5']=='Data Scientist']
In [5]:
country_dist=df['Q3'].value_counts()
fig = px.choropleth(country_dist.values, locations=country_dist.index,
                    locationmode='country names',
                    color=country_dist.values,
                    color_continuous_scale=px.colors.sequential.OrRd)
fig.update_layout(title="Distribuição dos Cientistas de Dados pelo Globo")
plot(fig, filename='figure.html')
display(HTML('figure.html'))

Detalhes dos Entrevistados

Por Gênero

In [6]:
gen = df['Q2'].value_counts()

plt.bar(x=gen.index, height=gen.values)
plt.show()

Por Idade

In [7]:
texto = df['Q1'].value_counts().index
valor = df['Q1'].value_counts().values

plt.plot(texto, valor, '-ok')
Out[7]:
[<matplotlib.lines.Line2D at 0x7f058fdc1550>]

Por Escolaridade

In [8]:
reg = df['Q4'].value_counts(sort=True)

outros = 0
texto = []
valor = []

for i in reg.index:
    if reg[i] < 760: 
        outros += reg[i]
    else:
        texto.append(i)
        valor.append(reg[i])
        
texto.append('Outros')
valor.append(outros)

plt.pie(valor, labels=texto, autopct='%1.1f%%', shadow=True, startangle=90)
plt.show()

Onde preferem estudar

In [9]:
mooc={}
for i in range(12):
    texto=df['Q13_Part_'+str(i+1)].value_counts().index[0]
    valor=df['Q13_Part_'+str(i+1)].value_counts().values[0]
    mooc[texto]=valor
mooc=dict(sorted(mooc.items(), key=operator.itemgetter(1)))

plt.barh(y=list(mooc.keys()), width=list(mooc.values()), color='#03396c')
plt.title('MOOC (Massive Open Online Courses)')
plt.show()
In [10]:
pub={}
for i in range(12):
    value=df['Q12_Part_'+str(i+1)].value_counts().values[0]
    text=df['Q12_Part_'+str(i+1)].value_counts().index[0]
    pub[text]=value
pub=dict(sorted(pub.items(), key=operator.itemgetter(1)))

plt.barh(y=list(pub.keys()), width=list(pub.values()))
plt.title('Páginas da Comunidade')
plt.show()

Atividades que realizam

In [11]:
texto=[]
valor=[]
for i in range(8):
    texto.append(df['Q9_Part_'+str(i+1)].value_counts().index[0])
    valor.append(df['Q9_Part_'+str(i+1)].value_counts().values[0])
    
plt.barh(y=texto, width=valor)
plt.show()

Há quanto tempo em Atividade

In [12]:
anos = df['Q15'].value_counts().index
valor = df['Q15'].value_counts().values

plt.bar(x = anos, height = valor)
plt.xticks(rotation='vertical')
plt.show()

Como trabalha?

In [13]:
tool = df['Q14'].value_counts().index
value2 = df['Q14'].value_counts().values

plt.barh(y=tool, width=value2)
plt.show()
In [14]:
vis = []
value3 = []
for i in range(12):
    value=df['Q20_Part_'+str(i+1)].value_counts().values[0]
    text=df['Q20_Part_'+str(i+1)].value_counts().index[0]
    vis.append(text)
    value3.append(value)

plt.plot(vis, value3, '-ok')
plt.xticks(rotation='vertical')
Out[14]:
([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
 <a list of 12 Text major ticklabel objects>)

Até a próxima \ Fernando Anselmo